## Table A5

#directory
#set working directory to the path PvP_Replication
#setwd("~/PvP_Replication")

#packages
library(tidyverse) #for data cleaning and manipulation
library(kbal) #for calculating kernel balancing weights
library(cobalt) #assess balance
library(xtable) #simple tables in latex
library(estimatr) #estimation

#load main dataset
pvp_data <- read.csv("PvP_data/PvP_data_main.csv")


#transform independent variable
#create binary (hectares > 100) cultivation variable
pvp_data <- pvp_data %>% 
  mutate(cultbinary = ifelse(maxcult > 100, 1, 0))

#balancing model

#transform selected covariates
#create variable for highways per square km, percent of population in rural zones, log population
pvp_data <- pvp_data %>% 
  mutate(hwycover = (hwylengthkm/sqkm), ruralpct = (ruralpop14/pop14)*100, logpop = log(pop14))

#list of covariates for kernel balancing
covariate_list <- c("lit_ratio", "electric_ratio", "logpop", "ruralpct", "elev_stdev", "hwycover", "distCali", "distBogota", "distMedellin")

#subset data to FARC municipalities for main anaylses
pvp_farc_subset <- pvp_data %>% filter(farc_presence == 1)

#note that there are two cases missing covariates
pvp_farc_subset %>% 
  dplyr::select(municode, all_of(covariate_list), dissident_presence, cultbinary) %>% 
  filter(!complete.cases(.)) %>% 
  count()

#drop the two incomplete cases
pvp_farc_subset <- pvp_farc_subset %>% 
  filter(complete.cases(pvp_farc_subset[ , c(covariate_list, 'dissident_presence', 'cultbinary')]))

#calculate weights using kernel balancing as in main specification
pvp_farc_subset$kbalweights <- kbal(allx = pvp_farc_subset[,covariate_list],
                                    treatment = pvp_farc_subset$cultbinary,
                                    fullSVD = TRUE, meanfirst = F)$w


#calculate weights with varied beta parameters
pvp_farc_subset$kbalb5 <- kbal(allx = pvp_farc_subset[,covariate_list],
                                    treatment = pvp_farc_subset$cultbinary, b = 5,
                                    fullSVD = TRUE, meanfirst = FALSE, mixed_data = F, printprogress = FALSE)$w 

pvp_farc_subset$kbalb2.5 <- kbal(allx = pvp_farc_subset[,covariate_list],
                                      treatment = pvp_farc_subset$cultbinary, b = 2.5,
                                      fullSVD = TRUE, meanfirst = FALSE, mixed_data = F, printprogress = FALSE)$w 

pvp_farc_subset$kbalb10 <- kbal(allx = pvp_farc_subset[,covariate_list],
                                     treatment = pvp_farc_subset$cultbinary, b = 10,
                                     fullSVD = TRUE, meanfirst = FALSE, mixed_data = F, printprogress = FALSE)$w 

#empty dataframe for results
betatest <- data.frame(beta = rep(NA, 4), estimate = rep(NA, 4), ESS = rep(NA, 4), F_omni = rep(NA, 4))

#values of beta tested
betatest$beta <- c(15.37132, 10, 5, 2.5)

#effective sample sizes
betatest$ESS = c(cobalt::bal.tab(x = cultbinary ~ lit_ratio + electric_ratio + logpop + ruralpct + elev_stdev + hwycover + distCali + distBogota + distMedellin, treat = "cultbinary", data = pvp_farc_subset, weights = "kbalweights")[["Observations"]][["Control"]][2], 
                 cobalt::bal.tab(x = cultbinary ~ lit_ratio + electric_ratio + logpop + ruralpct + elev_stdev + hwycover + distCali + distBogota + distMedellin, treat = "cultbinary", data = pvp_farc_subset, weights = "kbalb10")[["Observations"]][["Control"]][2], 
                 cobalt::bal.tab(x = cultbinary ~ lit_ratio + electric_ratio + logpop + ruralpct + elev_stdev + hwycover + distCali + distBogota + distMedellin, treat = "cultbinary", data = pvp_farc_subset, weights = "kbalb5")[["Observations"]][["Control"]][2], 
                 cobalt::bal.tab(x = cultbinary ~ lit_ratio + electric_ratio + logpop + ruralpct + elev_stdev + hwycover + distCali + distBogota + distMedellin, treat = "cultbinary", data = pvp_farc_subset, weights = "kbalb2.5")[["Observations"]][["Control"]][2])


#estimates
betatest$estimate <- c(
  lm_robust(dissident_presence ~ cultbinary, pvp_farc_subset, weights = kbalweights)$coefficients[2], 
  lm_robust(dissident_presence ~ cultbinary, pvp_farc_subset, weights = kbalb10)$coefficients[2],
  lm_robust(dissident_presence ~ cultbinary, pvp_farc_subset, weights = kbalb5)$coefficients[2],
  lm_robust(dissident_presence ~ cultbinary, pvp_farc_subset, weights = kbalb2.5)$coefficients[2]
)

#f-statistics
betatest$F_omni <- c(
  summary(lm_robust(cultbinary ~ lit_ratio + elev_stdev + ruralpct + hwycover + logpop + electric_ratio + electric_ratio + distCali + distBogota + distMedellin, pvp_farc_subset, weights = kbalweights))$fstatistic[["value"]], 
  summary(lm_robust(cultbinary ~ lit_ratio + elev_stdev + ruralpct + hwycover + logpop + electric_ratio + electric_ratio + distCali + distBogota + distMedellin, pvp_farc_subset, weights = kbalb10))$fstatistic[["value"]], 
  summary(lm_robust(cultbinary ~ lit_ratio + elev_stdev + ruralpct + hwycover + logpop + electric_ratio + electric_ratio + distCali + distBogota + distMedellin, pvp_farc_subset, weights = kbalb5))$fstatistic[["value"]], 
  summary(lm_robust(cultbinary ~ lit_ratio + elev_stdev + ruralpct + hwycover + logpop + electric_ratio + electric_ratio + distCali + distBogota + distMedellin, pvp_farc_subset, weights = kbalb2.5))$fstatistic[["value"]]
)


#create table of results

#rename columns
colnames(betatest) <- c("Bandwidth", "Estimate of Y ~ D", "Effective Sample Size (Control)", "Omnibus F-stat for D ~ X")

#print result
print(xtable(betatest), include.rownames = FALSE)

